% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%           ESTIMATION OF STUDENT PREFERENCES UNDER CONSTRAINED DA - SIMULATED DATA               %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %

cd(folder_data)
load simulated_data 

School_char_orig = School_char; % IxJxM
School_small_orig = School_small; % IxJxM

sim_data = sim_data_Mis;
M = NSim*P; % total number of datasets
School_char = repmat(School_char_orig,1,1,P); % IxJxM*P 
School_small = repmat(School_small_orig,1,1,P); % IxJxM*P 

% Load simulated data
simulated_data = sim_data;

% --------------------- %
% True parameter values %
% --------------------- %
coeff_small = 0;

theta_true = [coeff_fe; coeff_char; coeff_dist; coeff_small];


% Number of parameters
nb_params = size(theta_true,1);

% --------------- %
% OPTIONS FOR MLE %
% --------------- %

% Specify here the tolerance parameters for MLE (fminunc and fminsearch) used in main estimation
TolFun_MLE = 1e-7;
TolX_MLE = 1e-7;

% ------------------------------------ %
% Parameters of optimization algorithm %
% ------------------------------------ %

optfminunc = optimoptions(@fminunc, 'display','none',...
    'Algorithm','quasi-newton',...
    'MaxIter',5e+4,...
    'MaxFunEvals',5e+4, ...
    'TolFun',TolFun_MLE,... 
    'TolX',TolX_MLE);

% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%                                   PART I: ESTIMATION                                            %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %

% Start timer
tic

% To monitor progress of parfor loops
func_parfor_progress(M);

% Loop over MC samples
parfor mm=1:M
   
    % ----------------------------------------------------------------------------------------------- %
    %                                         1/ PREPARE DATA                                         %
    % ----------------------------------------------------------------------------------------------- %
    
    % Retrieve MC sample from master data
    full_sample=simulated_data(simulated_data(:,1)==mm,:);
    
    % Student ids
    stu_id = full_sample(:,2);
    
    % School ids
    school_id = full_sample(:,3);
    
    % School dummy variables
    school_id_dum=[];
    for ss=1:J
        school_id_dum(:,ss) = (school_id==ss);
    end
    
    % Observed cutoffs
    observed_cutoffs=full_sample(1:J,8);
    
    % True ranking of schools
    sch_true_ranking = full_sample(:,4);
    
    % Submitted rankings of schools
    sch_ranking = full_sample(:,5);
    
    % Feasible schools
    sch_feasible = full_sample(:,9);
    
    % Assigned school (dummy variable)
    sch_assignment = full_sample(:,10);
    
    % List of assigned students
    list_assigned_stu = stu_id(sch_assignment==1,:);
    
    % Number of assigned students (scalar)
    IA=numel(list_assigned_stu);
    
    % logical array that selects assigned students from initial data
    select_assigned = ismember(stu_id,list_assigned_stu);
    
    % Matrix of dummy variables for students asigned school (IA x J)
    assignment = reshape(sch_assignment(select_assigned),J,[])';
    
    % Distance to schools
    distance_school = full_sample(:,12);
    distance_school_samp = reshape(distance_school(select_assigned),J,[])';
    distance_school_full = reshape(distance_school,J,[])';
    
    % Special School
    % sch_char = repmat(School_char',[I 1]);
    sch_char = repmat(School_char(1,:,mm)',[I 1]);
    
    % Small School
    sch_small = repmat(School_small(1,:,mm)',[I 1]);
    
    % Student char
    stu_char = full_sample(:,23);
    stu_char_samp = reshape(stu_char(select_assigned),J,[])';
    stu_char1_samp = stu_char_samp(:,2);
    
    stu_char1_full = stu_char(school_id==1);
    
    % Own char x school char
    sch_stu_char = stu_char .* sch_char;
    
    % Covariates
    covariates_full=[];
    % covariates_samp=[];
    % covariates_assigned=[];
    
    covariates = school_id_dum;
    if coeff_char ~= 0
        covariates = horzcat(covariates, sch_stu_char);
    end
    if coeff_dist ~= 0
        covariates = horzcat(covariates, distance_school);
    end
    covariates = horzcat(covariates, sch_small);
    
    nb_covariates = size(covariates,2);
    
    for cc=1:nb_covariates
        covariates_full(:,:,cc)= reshape(covariates(:,cc),J,[])';
    end
    covariates_samp = covariates_full(list_assigned_stu,:,:);
    covariates_assigned = permute(sum(covariates_samp.*repmat(assignment,1,1,nb_covariates),2),[1 3 2]);
    
    % Ex post feasible schools (IA x J matrix of dummy variables)
    feasible=reshape(sch_feasible(select_assigned),J,[])';
    feasible_full=reshape(sch_feasible,J,[])';
    
    % Students' submitted ranking of schools (IA,5). N.B. 0 if not listed
    sub_rks = reshape(sch_ranking,J,[])';
    true_rks = reshape(sch_true_ranking,J,[])';
    
    % ----------------------------------------------------------------------------------------------- %
    %                         2/ ESTIMATE MODEL USING DIFFERENT APPROACHES                            %
    % ----------------------------------------------------------------------------------------------- %
    
    % --------------- %
    % STARTING VALUES %
    % --------------- %
    
    % i) True theta
    start_val1 = theta_true;
    % ii) Vector of zeros
    start_val2 = zeros(nb_params,1);
    % (iii) Vector of random values
    rng(1000)
    start_val3 = rand(nb_params,1);
       
    % ---------------- %
    % 1/ TRUTH-TELLING %
    % ---------------- %
    
    % Method: MLE (rank ordered/exploded logit)
    
    % Choice set: all schools (exploded)
    choice_set_TT = [];
    block_TT = [];
    for jj=1:J
        if jj==1
            choice_set_TT = (true_rks>=0);
            block_TT = choice_set_TT;
        else
            block_TT = block_TT-(sub_rks==jj-1);
            choice_set_TT = vertcat(choice_set_TT, block_TT);
        end
    end
    
    % Choices: top ranked school within exploded choice sets (1/0)
    choices_TT=[];
    for jj=1:J
        if jj==1
            choices_TT =(sub_rks==jj);
        else
            choices_TT = [choices_TT; sub_rks==jj];
        end
    end
    
    % Starting values
    start_val_TT = [start_val1, start_val2, start_val3];
    
    % Objective function: log-likelihood
    f_TT = @(x)func_LL(x,choices_TT,choice_set_TT,covariates_full);
    
    % Point estimates / s.e.
    LL_TT_est_= NaN(nb_params,3);
    LL_TT_fval_ = NaN(1,3); 
    for ss=1:3 % loop over starting values
        % Estimates
        [LL_TT_est_(:,ss), LL_TT_fval_(:,ss), ~, ~, ~, ~] = fminunc(f_TT,start_val_TT(:,ss),optfminunc);
    end
    
    % Keep the estimate that minimizes the likelihood function
    [~, id_LL_TT] = min(LL_TT_fval_);
    LL_TT_est(:,mm) = LL_TT_est_(:,id_LL_TT);
        
    % ---------------------------- %
    % 2/ STABILITY                 %
    % ---------------------------- %
    
    % Estimation method: MLE (conditional logit)
       
    % Choice set: ex post feasible schools
    choice_set_feas = feasible;
    
    % Choices: assigned school (1/0)
    choices_feas = assignment;
    
    % Starting values
    start_val_ST = [start_val1, start_val2, start_val3];
    
    % Objective function: log-likelihood
    f_ST = @(x)func_LL(x,choices_feas,choice_set_feas,covariates_samp);
    
    % Point estimates / s.e.
    LL_ST_est_= NaN(nb_params,3);
    LL_ST_fval_ = NaN(1,3);
    for ss=1:3 % loop over starting values
        % Estimates
        [LL_ST_est_(:,ss), LL_ST_fval_(:,ss), ~, ~, ~, ~] = fminunc(f_ST,start_val_ST(:,ss),optfminunc);
    end
    
    % Keep the estimate that minimizes the likelihood function
    [~, id_LL_ST] = min(LL_ST_fval_);
    LL_ST_est(:,mm) = LL_ST_est_(:,id_LL_ST);   
    
    % Update parfor monitor
    func_parfor_progress;
    
end

% Close parfor monitor
func_parfor_progress(0);
delete parfor_progress.txt

%%

% ----------------------------------------------------------------------------------------------- %
%                                                                                                 %
%                                 PART II : SUMMARY OF RESULTS                                    %
%                                                                                                 %
% ----------------------------------------------------------------------------------------------- %

M = NSim;

tab_est = table('Size',[8,11], ...
    'VariableTypes',{'double','double','double','double','double','double','double','double','double','double','double'},...
    'VariableNames',{'Quality (mean)','Quality (s.d.)','|','Distance (mean)','Distance (s.d.)','"',...
    'Interaction (mean)', 'Interaction (s.d.)', ';', 'Small (mean)', 'Small (s.d.)'},...
    'RowNames',{'DGP TT: WTT','DGP TT: Stability','-','DGP PIM: WTT','DGP PIM: Stability','~',...
    'DGP PRM: WTT','DGP PRM: Stability'});

for pp = 1:P
    for kk = 1:4
        % -------------------------- %
        % 1/ MEAN OF POINT-ESTIMATES %
        % 2/ S.D. OF POINT-ESTIMATES %
        % -------------------------- %
        % WTT
        tab_est(3*(pp-1)+1, (kk-1)*3+1) = table(mean(LL_TT_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1))));
        tab_est(3*(pp-1)+1, (kk-1)*3+2) = table(std(LL_TT_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1))));
        
        % Stability
        tab_est(3*(pp-1)+2, (kk-1)*3+1) = table(mean(LL_ST_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1))));
        tab_est(3*(pp-1)+2, (kk-1)*3+2) = table(std(LL_ST_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1))));
    end
end

% tab_est = NaN(8,11);
% 
% for pp = 1:P
%     for kk = 1:4
%         % -------------------------- %
%         % 1/ MEAN OF POINT-ESTIMATES %
%         % 2/ S.D. OF POINT-ESTIMATES %
%         % -------------------------- %
%         % WTT
%         tab_est(3*(pp-1)+1, (kk-1)*3+1) = mean(LL_TT_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1)));
%         tab_est(3*(pp-1)+1, (kk-1)*3+2) = std(LL_TT_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1)));
%         
%         % Stability
%         tab_est(3*(pp-1)+2, (kk-1)*3+1) = mean(LL_ST_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1)));
%         tab_est(3*(pp-1)+2, (kk-1)*3+2) = std(LL_ST_est(kk,1+NSim*(pp-1):NSim+NSim*(pp-1)));
%     end
% end


disp('Estimates: mean, s.d., ');
disp(tab_est);

% save table E1
cd(folder_tables)
writetable(tab_est,'tab_E1.csv','WriteRowNames',true)

% save the dataset
cd(folder_data)
save est_Mis LL_TT_est LL_ST_est ...
    tab_est theta_true M P J I

% end timer
toc
